library("tidyverse")
── Attaching core tidyverse packages ──────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library("data.table")
Registered S3 method overwritten by 'data.table':
method from
print.data.table
data.table 1.14.10 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
Attaching package: ‘data.table’
The following objects are masked from ‘package:lubridate’:
hour, isoweek, mday, minute, month, quarter, second, wday, week, yday,
year
The following objects are masked from ‘package:dplyr’:
between, first, last
The following object is masked from ‘package:purrr’:
transpose
library("rtracklayer")
Loading required package: GenomicRanges
Warning: package ‘GenomicRanges’ was built under R version 4.2.2
Loading required package: stats4
Loading required package: BiocGenerics
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:lubridate’:
intersect, setdiff, union
The following objects are masked from ‘package:dplyr’:
combine, intersect, setdiff, union
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames,
dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl,
intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste,
pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames,
sapply, setdiff, sort, table, tapply, union, unique, unsplit, which.max,
which.min
Loading required package: S4Vectors
Warning: package ‘S4Vectors’ was built under R version 4.2.2
Attaching package: ‘S4Vectors’
The following objects are masked from ‘package:data.table’:
first, second
The following objects are masked from ‘package:lubridate’:
second, second<-
The following objects are masked from ‘package:dplyr’:
first, rename
The following object is masked from ‘package:tidyr’:
expand
The following objects are masked from ‘package:base’:
expand.grid, I, unname
Loading required package: IRanges
Attaching package: ‘IRanges’
The following object is masked from ‘package:data.table’:
shift
The following object is masked from ‘package:lubridate’:
%within%
The following objects are masked from ‘package:dplyr’:
collapse, desc, slice
The following object is masked from ‘package:purrr’:
reduce
Loading required package: GenomeInfoDb
Warning: package ‘GenomeInfoDb’ was built under R version 4.2.2
library("ggrastr")
library("glue")
Attaching package: ‘glue’
The following object is masked from ‘package:GenomicRanges’:
trim
The following object is masked from ‘package:IRanges’:
trim
library("DESeq2")
Loading required package: SummarizedExperiment
Loading required package: MatrixGenerics
Loading required package: matrixStats
Attaching package: ‘matrixStats’
The following object is masked from ‘package:dplyr’:
count
Attaching package: ‘MatrixGenerics’
The following objects are masked from ‘package:matrixStats’:
colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse, colCounts,
colCummaxs, colCummins, colCumprods, colCumsums, colDiffs, colIQRDiffs,
colIQRs, colLogSumExps, colMadDiffs, colMads, colMaxs, colMeans2,
colMedians, colMins, colOrderStats, colProds, colQuantiles, colRanges,
colRanks, colSdDiffs, colSds, colSums2, colTabulates, colVarDiffs,
colVars, colWeightedMads, colWeightedMeans, colWeightedMedians,
colWeightedSds, colWeightedVars, rowAlls, rowAnyNAs, rowAnys,
rowAvgsPerColSet, rowCollapse, rowCounts, rowCummaxs, rowCummins,
rowCumprods, rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks, rowSdDiffs,
rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars, rowWeightedMads,
rowWeightedMeans, rowWeightedMedians, rowWeightedSds, rowWeightedVars
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'. To
cite Bioconductor, see 'citation("Biobase")', and for packages
'citation("pkgname")'.
Attaching package: ‘Biobase’
The following object is masked from ‘package:MatrixGenerics’:
rowMedians
The following objects are masked from ‘package:matrixStats’:
anyMissing, rowMedians
library("ggpubr")
library("wigglescout")
library("eulerr")
library("ggplot2")
library(corrplot)
corrplot 0.92 loaded
# export
result_folder = "../results/"
plot_folder = "../results/plots/"
stat_output = "../results/stats/"
combined_bigwigs <- list.files("../mendeley/Figure2","CTCF_ChIP-seq.+bw",
full.names = TRUE)
geo_bigwigs <- list.files("../mendeley/Figure2","GSM.+bw",
full.names = TRUE)
# colors
mypal <-c("cornflowerblue","orange","red2","darkgreen","#505050")
mypal <-c("#00619D","#A82A34","orange","seagreen","#505050")
mypal2 <-c("#00619D","#7FB0CE","#A82A34","#D39499","orange","#FFD55A","seagreen","#7DBA9C","#505050")
# diff signal function
bw_granges_diff_analysis <- function(granges_c1,
granges_c2,
label_c1,
label_c2,
estimate_size_factors = FALSE,
as_granges = FALSE) {
# Bind first, get numbers after
names_values <- NULL
fields <- names(mcols(granges_c1))
if ("name" %in% fields) {
names_values <- mcols(granges_c1)[["name"]]
granges_c1 <- granges_c1[, fields[fields != "name"]]
}
fields <- names(mcols(granges_c2))
if ("name" %in% fields) {
granges_c2 <- granges_c2[, fields[fields != "name"]]
}
cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
if (!is.null(names_values)) {
rownames(cts_df) <- names_values
}
# Needs to drop non-complete cases and match rows
complete <- complete.cases(cts_df)
cts_df <- cts_df[complete, ]
values_df <- cts_df[, 6:ncol(cts_df)] %>% dplyr::select(where(is.numeric))
cts <- get_nreads_columns(values_df)
condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), rep(label_c2, length(mcols(granges_c2))))
coldata <- data.frame(colnames(cts), condition = as.factor(condition_labels))
print(coldata)
dds <- DESeq2::DESeqDataSetFromMatrix(
countData = cts,
colData = coldata,
design = ~ condition,
rowRanges = granges_c1[complete, ]
)
if (estimate_size_factors == TRUE) {
dds <- DESeq2::estimateSizeFactors(dds)
}
else {
# Since files are scaled, we do not want to estimate size factors
sizeFactors(dds) <- c(rep(1, ncol(cts)))
}
dds <- DESeq2::estimateDispersions(dds)
dds <- DESeq2::nbinomWaldTest(dds)
if (as_granges) {
result <- DESeq2::results(dds, format = "GRanges", alpha = 0.01)
if (!is.null(names_values)) {
result$name <- names_values[complete]
}
}
else {
# result <- results(dds, format="DataFrame")
result <- dds
}
result
}
get_nreads_columns <- function(df, length_factor = 100) {
# Convert mean coverages to round integer read numbers
cts <- as.matrix(df)
cts <- as.matrix(cts[complete.cases(cts), ])
cts <- round(cts * length_factor)
cts
}
ctcf.and.G4.pro <- import("../peaks/G4 CTCF_with_promoters_sorted.bed")
ctcf.not.G4.pro <- import("../peaks/CTCF-only_with_promoters_sorted.bed")
ctcf.and.G4.npr <- import("../peaks/G4 CTCF_without_promoters_sorted.bed")
ctcf.not.G4.npr <- import("../peaks/CTCF-only_without_promoters_sorted.bed")
ctcf.and.G4.pro$class <- "CTCF_and_G4"
ctcf.not.G4.pro$class <- "CTCF_not_G4"
ctcf.and.G4.npr$class <- "CTCF_and_G4"
ctcf.not.G4.npr$class <- "CTCF_not_G4"
ctcf.and.G4.pro$pro <- "Pro"
ctcf.not.G4.pro$pro <- "Pro"
ctcf.and.G4.npr$pro <- "noPro"
ctcf.not.G4.npr$pro <- "noPro"
ctcf.and.G4 <- c(ctcf.and.G4.pro,ctcf.and.G4.npr)
ctcf.not.G4 <- c(ctcf.not.G4.pro,ctcf.not.G4.npr)
ctcf <- c(ctcf.and.G4,ctcf.not.G4)
ctcf <- sortSeqlevels(ctcf)
ctcf <- sort(ctcf)
names(ctcf) <- paste0(ctcf$class," ",ctcf$pro)
peaks_bed <- "../peaks/CTCF_G4_in_6_categories.bed"
export.bed(ctcf, peaks_bed)
ctcf.raw <- import("../peaks/CTCF_mES_peaks.narrowPeak")
G4.raw <- import("../peaks/G4_WT_peaks.narrowPeak")
ctcf.raw$class <- "CTCF"
G4.raw$class <- "G4"
ctcf.raw$pro <- "NA"
G4.raw$pro <- "NA"
df <- rbind(as.data.frame(ctcf.raw)[c(1,2,3,4,5,12,13)], as.data.frame(G4.raw)[c(1,2,3,4,5,12,13)],as.data.frame(ctcf,row.names = NULL))
ggdensity(df,x="width",y = "density",color = "class", fill="class", alpha=0.1, palette=mypal) + scale_x_continuous(limits = c(100,600))
Warning: Removed 3661 rows containing non-finite outside the scale range (`stat_density()`).
min(width(G4.raw))
[1] 233
min(width(ctcf.raw))
[1] 200
median(width(G4.raw))
[1] 316
median(width(ctcf.raw))
[1] 332
# Wulfridge CTCF ChIP-Seq mocks
mocks_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958387_GSM7116277_E14_Mock_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
"../data/CutNTag_ChIP-Seq/bw/SRR23958386_GSM7116278_E14_Mock_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")
# PDS
# Wulfridge CTCF ChIP-Seq
pds_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958385_GSM7116279_E14_PDS_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
"../data/CutNTag_ChIP-Seq/bw/SRR23958384_GSM7116280_E14_PDS_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")
pdc_bigwigs = c("../data/CutNTag_ChIP-Seq/bw/SRR23958383_GSM7116281_E14_PhenDC3_CTCF_Rep1_Mus_musculus_ChIP-Seq.CPMnorm.bw",
"../data/CutNTag_ChIP-Seq/bw/SRR23958382_GSM7116282_E14_PhenDC3_CTCF_Rep2_Mus_musculus_ChIP-Seq.CPMnorm.bw")
G4_bigwigs = c("../mendeley/Figure1/G4_CUT-Tag_thisStudy.bw")
cov.mocks <- bw_loci(mocks_bigwigs,peaks_bed,labels=c("mock_1","mock_2"))
cov.pds <- bw_loci(pds_bigwigs,peaks_bed,labels=c("PDS_1","PDS_2"))
cov.pdc <- bw_loci(pdc_bigwigs,peaks_bed,labels=c("PhenDC3_1","PhenDC3_2"))
cov.G4 <- bw_loci(G4_bigwigs,peaks_bed,labels=c("G4_NT"))
cov.combined <- bw_loci(combined_bigwigs,peaks_bed,labels=c("mock","PDS","PhenDC3"))
cov.geo <- bw_loci(geo_bigwigs,peaks_bed,labels=c("GEO_mock","GEO_PDS","GEO_PhenDC3"))
corr_df <- data.frame(data.frame(cov.combined)[,c(9,6,7,8)],data.frame(cov.geo)[,6:8],data.frame(cov.mocks)[,6:7],data.frame(cov.pds)[,6:7],data.frame(cov.pdc)[,6:7])
res <- cor(corr_df[,2:13])
corrplot(res, type = "upper", order = "hclust",tl.col = "black", tl.srt = 45,addCoef.col = 'black',tl.pos = 'd', cl.pos = 'n', col.lim=c(0.93, 1),is.corr = F, method = 'color')
p1 <- plot_bw_profile(c(mocks_bigwigs[1],pds_bigwigs[1],pdc_bigwigs[1]),loci = "../peaks/G4 CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
p2 <- plot_bw_profile(c(mocks_bigwigs[2],pds_bigwigs[2],pdc_bigwigs[2]),loci = "../peaks/G4 CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
p3 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../peaks/G4 CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.4))
ggarrange(p1,p2,p3, ncol=3)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = "../peaks/G4 CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,5))
p2 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../peaks/G4 CTCF_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.6))
ggarrange(p1,p2, ncol=2)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = "../peaks/CTCF-only_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,5))
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
p2 <- plot_bw_profile(bwfiles = combined_bigwigs,loci = "../peaks/CTCF-only_with_promoters_sorted.bed",mode = "center",verbose = F) + coord_cartesian(ylim=c(0,2.6))
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 324 generated ( 0.0538563829787234 per locus)
ggarrange(p1,p2, ncol=2)
p1 <- plot_bw_loci_scatter(mocks_bigwigs[1],mocks_bigwigs[2], loci = "../peaks/G4 CTCF_with_promoters_sorted.bed", verbose = F)
p2 <- plot_bw_loci_scatter(pds_bigwigs[1],pds_bigwigs[2], loci = "../peaks/G4 CTCF_with_promoters_sorted.bed", verbose = F)
p3 <- plot_bw_loci_scatter(pdc_bigwigs[1],pdc_bigwigs[2], loci = "../peaks/G4 CTCF_with_promoters_sorted.bed", verbose = F)
ggarrange(p1,p2,p3, ncol=3)
p1 <- plot_bw_loci_scatter(combined_bigwigs[1],combined_bigwigs[2], loci = "../peaks/G4 CTCF_with_promoters_sorted.bed", verbose = F)
p2 <- plot_bw_loci_scatter(combined_bigwigs[2],combined_bigwigs[3], loci = "../peaks/G4 CTCF_with_promoters_sorted.bed", verbose = F)
p3 <- plot_bw_loci_scatter(combined_bigwigs[1],combined_bigwigs[3], loci = "../peaks/G4 CTCF_with_promoters_sorted.bed", verbose = F)
ggarrange(p1,p2,p3, ncol=3)
results <- data.frame(
as.data.frame(cov.mocks),
as.data.frame(cov.pds)[6:7],
as.data.frame(cov.pdc)[6:7],
as.data.frame(cov.G4)[6],
raw.lfc.pds_1 = log2(cov.pds$PDS_1 / cov.mocks$mock_1),
raw.lfc.pds_2 = log2(cov.pds$PDS_2 / cov.mocks$mock_2),
raw.lfc.pdc_1 = log2(cov.pds$PDS_1 / cov.mocks$mock_1),
raw.lfc.pdc_2 = log2(cov.pdc$PhenDC3_2 / cov.mocks$mock_2),
raw.lfc.pds = log2(rowMeans(as.data.frame(cov.pds)[6:7]) / rowMeans(as.data.frame(cov.mocks)[6:7])),
raw.lfc.pdc = log2(rowMeans(as.data.frame(cov.pdc)[6:7]) / rowMeans(as.data.frame(cov.mocks)[6:7])),
mean.mock = rowMeans(as.data.frame(cov.mocks)[6:7]),
mean.pds = rowMeans(as.data.frame(cov.pds)[6:7]),
mean.pdc = rowMeans(as.data.frame(cov.pdc)[6:7])
)
cov.mocks$name <- NULL
cov.pds$name <- NULL
cov.pdc$name <- NULL
de_pds <- bw_granges_diff_analysis(cov.mocks, cov.pds, "Mock", "PDS")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pds = DESeq2::lfcShrink(de_pds, coef = "condition_PDS_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
de_pdc <- bw_granges_diff_analysis(cov.mocks, cov.pdc, "Mock", "PhenDC3")
converting counts to integer mode
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
lfc_pdc = DESeq2::lfcShrink(de_pdc, coef = "condition_PhenDC3_vs_Mock", type = "apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
results$deseq.lfc.pds <- results(de_pds)$log2FoldChange
results$deseq.lfcs.pds <- lfc_pds$log2FoldChange
results$deseq.padj.pds <- lfc_pds$padj
results$deseq.mean.pds <- log2(lfc_pds$baseMean)
results$deseq.sig.pds <- lfc_pds$pvalue < 0.05
results$deseq.lfc.pdc <- results(de_pdc)$log2FoldChange
results$deseq.lfcs.pdc <- lfc_pdc$log2FoldChange
results$deseq.padj.pdc <- lfc_pdc$padj
results$deseq.mean.pdc <- log2(lfc_pdc$baseMean)
results$deseq.sig.pdc <- lfc_pdc$pvalue < 0.05
results$class <- gsub(" .+", "", results$name)
results$pro <- gsub(".+ ", "", results$name)
results$pro <- factor(results$pro, levels = c("Pro", "noPro"))
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
results$log2.mock <- log2(results$mean.mock)
results$log2.pds <- log2(results$mean.pds)
results$log2.pdc <- log2(results$mean.pdc)
results$log2.G4 <- log2(results$G4_NT)
results$deseq.sigup.pds <- results$deseq.sig.pds &
results$deseq.lfc.pds > 0
results$deseq.sigup.pdc <- results$deseq.sig.pdc &
results$deseq.lfc.pdc > 0
write.table(results, glue("{result_folder}foldchange_results.txt"))
table(results$name)
CTCF_and_G4 noPro CTCF_and_G4 Pro CTCF_not_G4 noPro CTCF_not_G4 Pro
988 1300 43309 6016
#results <- read.table("foldchange_results.txt")
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
p <- ggviolin(
results,
x = "class",
y = "mean.mock",
fill = "class",
palette = mypal,
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
annotate_figure(p, fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr", fig.lab.size = 6)
ggsave(glue("{plot_folder}Violin_CTCF_classes.pdf"), last_plot())
Saving 3 x 3 in image
p <- ggviolin(
results,
x = "class",
y = "mean.mock",
fill = "pro",
palette = mypal,
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
annotate_figure(p, fig.lab = "CTCF signal by class in mock condition, mean of two reps, median+iqr", fig.lab.size = 6)
ggsave(glue("{plot_folder}Violin_CTCF_classes_pro.pdf"),
last_plot())
Saving 3 x 3 in image
mdf <- reshape2::melt(dplyr::select(
results,
c(
"class",
"mock_1",
"mock_2",
"PDS_1",
"PDS_2",
"PhenDC3_1",
"PhenDC3_2"
)
))
Using class as id variables
ggboxplot(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal
) + coord_cartesian(ylim = c(0, 10))
ggsave(glue("{plot_folder}Boxplot_CTCF_reps.pdf"), last_plot())
Saving 5 x 3 in image
mdf <- reshape2::melt(dplyr::select(
results,
c(
"class",
"mock_1",
"mock_2",
"PDS_1",
"PDS_2",
"PhenDC3_1",
"PhenDC3_2"
)
))
Using class as id variables
mdf_stats_class = compare_means(value ~ class, group.by = "variable", data = mdf)
ggviolin(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10))
ggsave(glue("{plot_folder}Violin_CTCF_reps.pdf"), last_plot())
Saving 5 x 3 in image
mdf <- reshape2::melt(dplyr::select(results, c(
"class", "deseq.lfc.pds", "deseq.lfc.pdc"
)))
Using class as id variables
p <- ggviolin(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-3, 3)) + stat_compare_means(aes(group = class), label.y = 3, size = 2)
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_compare_means()`).
ggsave(
glue("{plot_folder}Violin_CTCF_lfc.pdf"),
last_plot(),
width = 5,
height = 5
)
p <- ggboxplot(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-2, 2))
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_boxplot()`).
ggsave(glue("{plot_folder}Boxplot_CTCF_lfc.pdf"), last_plot())
Saving 3 x 3 in image
mdf <- reshape2::melt(dplyr::select(results, c(
"pro", "class", "deseq.lfc.pds", "deseq.lfc.pdc"
)))
Using pro, class as id variables
mdf$pro <- factor(mdf$pro, levels = c("Pro", "noPro"))
mdf$x <- as.factor(paste0(mdf$class, " ", mdf$variable))
mdf$x <- factor(mdf$x, levels = levels(mdf$x)[c(2, 1, 4, 3)])
p <- ggviolin(
mdf,
x = "x",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr",
facet.by = "pro"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-2, 2)) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
annotate_figure(p, fig.lab = "DESEq foldchange mean treat vs mean mock, median+iqr", fig.lab.size = 6)
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
ggsave(glue("{plot_folder}Violin_CTCF_lfc_pro.pdf"), last_plot())
Saving 5 x 6 in image
med <- aggregate(deseq.lfc.pds ~ class,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
med <- aggregate(deseq.lfc.pds ~ name,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
med <- aggregate(deseq.lfc.pds ~ class,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pds <- 2 ^ med$deseq.lfc.pds
med
med <- aggregate(deseq.lfc.pdc ~ name,
data = results,
FUN = "median",
na.rm = T)
med$deseq.fc.pdc <- 2 ^ med$deseq.lfc.pdc
med
deseq_lfc_stats = compare_means(deseq.lfc.pds ~ class, data = results)
deseq_lfc_stats
write_tsv(deseq_lfc_stats,
glue("{stat_output}pds-deseq2_lfc_statistics.tsv"))
deseq_lfc_stats = compare_means(deseq.lfc.pdc ~ class, data = results)
deseq_lfc_stats
write_tsv(deseq_lfc_stats,
glue("{stat_output}phendc-deseq2_lfc_statistics.tsv"))
mdf <- reshape2::melt(dplyr::select(
results,
c(
"class",
"raw.lfc.pds_1",
"raw.lfc.pds_2",
"raw.lfc.pdc_1",
"raw.lfc.pdc_2"
)
))
Using class as id variables
ggviolin(
mdf,
x = "variable",
y = "value",
fill = "class",
palette = mypal,
add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted") + coord_cartesian(ylim =
c(-3, 3)) +
stat_compare_means(aes(group = class), label.y = 3.6, size = 2)
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 469 rows containing non-finite outside the scale range
(`stat_compare_means()`).
ggsave(glue("{plot_folder}Violin_CTCF_lfc_individual_reps.pdf"), last_plot())
Saving 5 x 3 in image
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 469 rows containing non-finite outside the scale range (`stat_summary()`).
Warning: Removed 469 rows containing non-finite outside the scale range
(`stat_compare_means()`).
ggdensity(
results,
x = "raw.lfc.pds",
color = "class",
fill = "class",
palette = mypal
) + geom_vline(xintercept = 0, linetype = "dotted")
Warning: Removed 26 rows containing non-finite outside the scale range (`stat_density()`).
ggdensity(
results,
x = "raw.lfc.pdc",
color = "class",
fill = "class",
palette = mypal
) + geom_vline(xintercept = 0, linetype = "dotted")
Warning: Removed 23 rows containing non-finite outside the scale range (`stat_density()`).
ggscatter(
results,
x = "log2.mock",
y = "log2.pds",
size = 1,shape=20,
alpha = 0.1,
color = "deseq.sig.pds",
palette = mypal[c(5, 2)]
)
ggsave(glue("{plot_folder}Scatter_CTCF_DESEq_PDSvsMock.pdf"), rasterize(last_plot(),dpi = 600),width = 3, height= 3.2)
ggscatter(
results,
x = "log2.mock",
y = "log2.pdc",
size = 1,shape=20,
alpha = 0.1,
color = "deseq.sig.pdc",
palette = mypal[c(5, 2)]
)
ggsave(glue("{plot_folder}Scatter_CTCF_DESEq_PhenDC3vsMock.pdf"), rasterize(last_plot(),dpi = 600),width = 3, height= 3.2)
tb <- table(results$class)
deseq.stats <- as.data.frame(t(as.matrix(tb)))
deseq.stats[1,] <- colSums(deseq.stats)
rownames(deseq.stats) <- c("Total")
deseq.stats.percent <- deseq.stats
deseq.stats.percent[1,] <- c(100,100)
tb
CTCF_and_G4 CTCF_not_G4
2288 49325
tb <- table(results$deseq.sig.pds &
(results$deseq.lfc.pds > 0),
results$class)
deseq.stats <- rbind(deseq.stats, PDS.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 2220 46812
TRUE 68 2510
tb <- prop.table(table(results$deseq.sig.pds &
(results$deseq.lfc.pds > 0),
results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, PDS.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 97.027972 94.910993
TRUE 2.972028 5.089007
table(results$deseq.sig.pds &
(results$deseq.lfc.pds > 0),
results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2220 46812
TRUE 68 2510
mdf <- reshape2::melt(table(
results$deseq.sig.pds & (results$deseq.lfc.pds > 0),
results$class
))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
vl <- list(
sig = grep("TRUE", results$deseq.sigup.pds),
CTCF_G4 = grep("and", results$class),
CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)
tb <- table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc > 0),
results$class)
deseq.stats <- rbind(deseq.stats, PhenDC3.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 2251 48545
TRUE 37 780
tb <- prop.table(table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc > 0),
results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, PhenDC3.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 98.382867 98.418652
TRUE 1.617133 1.581348
table(results$deseq.sig.pdc &
(results$deseq.lfc.pdc > 0),
results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2251 48545
TRUE 37 780
mdf <- reshape2::melt(table(
results$deseq.sig.pdc & (results$deseq.lfc.pdc > 0),
results$class
))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
results$uid <- seq(1:nrow(results))
vl <- list(
sig = grep("TRUE", results$deseq.sigup.pdc),
CTCF_G4 = grep("and", results$class),
CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)
table(results$deseq.sigup.pds, results$deseq.sigup.pdc)
FALSE TRUE
FALSE 48576 456
TRUE 2217 361
mdf <- reshape2::melt(table(results$deseq.sigup.pds, results$deseq.sigup.pdc))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
tb <- table(results$deseq.sigup.pds & results$deseq.sigup.pdc,
results$class)
deseq.stats <- rbind(deseq.stats, both.sig.up = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 2277 48975
TRUE 11 350
tb <- prop.table(table(results$deseq.sigup.pds & results$deseq.sigup.pdc,
results$class),margin = 2)*100
deseq.stats.percent <- rbind(deseq.stats.percent, both = as.data.frame.matrix(tb)[2,])
tb
CTCF_and_G4 CTCF_not_G4
FALSE 99.5192308 99.2904207
TRUE 0.4807692 0.7095793
table(results$deseq.sigup.pds & results$deseq.sigup.pdc, results$class)
CTCF_and_G4 CTCF_not_G4
FALSE 2277 48975
TRUE 11 350
mdf <- reshape2::melt(table(results$deseq.sigup.pds & results$deseq.sigup.pdc, results$class))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
deseq.stats$class <- rownames(deseq.stats)
mdf <- reshape2::melt(deseq.stats)
Using class as id variables
ggplot(mdf, aes(variable, class, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
ggsave(glue("{plot_folder}Table_CTCF_DESEq_Sigup.pdf"), rasterize(last_plot(),dpi = 600),width = 2.5, height= 2.5)
deseq.stats.percent$class <- rownames(deseq.stats.percent)
mdf <- reshape2::melt(deseq.stats.percent[-1,])
Using class as id variables
ggplot(mdf, aes(variable, class, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = round(mdf$value,2))) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
Warning: Use of `mdf$value` is discouraged.
ℹ Use `value` instead.
ggsave(glue("{plot_folder}Table_CTCF_DESEq_Sigup_percent.pdf"), rasterize(last_plot(),dpi = 600),width = 2.5, height= 2.5)
Warning: Use of `mdf$value` is discouraged.
ℹ Use `value` instead.
vl <- list(
sig_PDS = grep("TRUE", results$deseq.sigup.pds),
sig_PDC = grep("TRUE", results$deseq.sigup.pdc),
CTCF_G4 = grep("and", results$class),
CTCFonly = grep("not", results$class)
)
plot(euler(vl), quantities = T)
results$uid <- seq(1:nrow(results))
vl <- list(
sig_PDS = grep("TRUE", results$deseq.sigup.pds),
sig_PDC = grep("TRUE", results$deseq.sigup.pdc),
CTCF_G4 = grep("and", results$class)
)
plot(euler(vl), quantities = T)
results$sig.by.class.pds <- paste0(results$deseq.sigup.pds, "_", results$class)
results$psize <- 0.01
results$psize[results$deseq.sigup.pds &
results$class == "CTCF_and_G4"] <- 1
ggscatter(
results,
x = "log2.mock",
y = "log2.pds",
size = 0.5,
alpha = results$psize,
color = "sig.by.class.pds",
palette = c(mypal[5], mypal[5], mypal[5], mypal[2], mypal[1])
)
ggscatter(
results[!grepl("NA", results$sig.by.class.pds), ],
x = "log2.mock",
y = "deseq.lfc.pds",
size = 0.2,
alpha = "psize",
color = "sig.by.class.pds",
palette = c("#505050", "#505050", "red2", "blue")
)
ggscatterhist(
results[!grepl("NA", results$sig.by.class.pds), ],
x = "log2.mock",
y = "log2.pds",
size = 0.4,
alpha = "psize",
color = "sig.by.class.pds",
margin.params = list(
fill = "sig.by.class.pds",
color = "black",
size = 0.2
),
palette = c("#505050", "#505050", "red2", "blue")
)
Warning: Removed 13 rows containing non-finite outside the scale range (`stat_density()`).
Warning: Removed 10 rows containing non-finite outside the scale range (`stat_density()`).
ggscatter(
results,
x = "raw.lfc.pds",
y = "log2.G4",
size = 0.2,
alpha = 0.1,
color = "deseq.sig.pds",
cor.coef = T,
palette = c("#888888",mypal[2])
)
Warning: Removed 16803 rows containing non-finite outside the scale range (`stat_cor()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
ggsave(glue("{plot_folder}Scatter_G4_vs_LFC_PDS.pdf"),
plot = rasterize(last_plot()))
Saving 3 x 3 in image
Warning: Removed 16803 rows containing non-finite outside the scale range (`stat_cor()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
ggscatter(
results,
x = "raw.lfc.pdc",
y = "log2.G4",
size = 0.2,
alpha = 0.1,
color = "deseq.sig.pdc",
cor.coef = T,
palette = c("#888888",mypal[2])
)
Warning: Removed 16800 rows containing non-finite outside the scale range (`stat_cor()`).
ggsave(glue("{plot_folder}Scatter_G4_vs_LFC_PDC.pdf"),
plot = rasterize(last_plot()))
Saving 3 x 3 in image
Warning: Removed 16800 rows containing non-finite outside the scale range (`stat_cor()`).
results$G4.quantile <- dplyr::ntile(results$G4_NT, n = 5)
results$pds.quantile <- dplyr::ntile(results$mean.pds-results$mean.mock, n = 4)
results$pdc.quantile <- dplyr::ntile(results$mean.pdc-results$mean.mock, n = 4)
tb <- table(results$pds.quantile, results$class)
tb
CTCF_and_G4 CTCF_not_G4
1 540 12364
2 639 12264
3 552 12351
4 557 12346
ggbarplot(as.data.frame(t(as.matrix(tb))),"Var2","Freq",fill="Var1",palette = c(mypal[1],"#aaaaaa"),label = TRUE, lab.pos="in",ylim=c(11000,13000))
ggsave(glue("{plot_folder}Barplot_peakCat_by_PDSquantile.pdf"),
plot = last_plot())
Saving 4 x 3 in image
ggviolin(
results,
x = "pds.quantile",
y = "deseq.lfc.pds",
fill = mypal[3],
add = "median_iqr"
) + coord_cartesian(ylim = c(-1.5, 1.5)) + geom_hline(yintercept = 0, linetype =
"dotted")
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
ggsave(glue("{plot_folder}Violin_lfcPDS_by_PDSquantile.pdf"),
plot = last_plot())
Saving 3 x 3 in image
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range (`stat_summary()`).
top25.pds <- makeGRangesFromDataFrame(results[results$pds.quantile==4,1:3],na.rm=T)
bot75.pds <- makeGRangesFromDataFrame(results[results$pds.quantile!=4,1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = top25.pds, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
Warning in .summarize_matrix(fg, label) :
Profile plot: 658 generated ( 0.0509958924281175 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 658 generated ( 0.0509958924281175 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 658 generated ( 0.0509958924281175 per locus)
p2 <- plot_bw_profile(bwfiles = geo_bigwigs,loci = bot75.pds, mode = "center",verbose = F, colors = mypal) + coord_cartesian(ylim=c(0,6))
Warning in .summarize_matrix(fg, label) :
Profile plot: 2056 generated ( 0.0531128907259106 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 2056 generated ( 0.0531128907259106 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 2056 generated ( 0.0531128907259106 per locus)
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile.pdf"),
plot = last_plot())
Saving 6 x 3 in image
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[1:2],loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[c(1,2)]) + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[1:2],loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[c(1,2)]) + coord_cartesian(ylim=c(0,10))
Warning in .summarize_matrix(fg, label) :
Profile plot: 639 generated ( 0.0517576543009882 per locus)
Warning in .summarize_matrix(fg, label) :
Profile plot: 639 generated ( 0.0517576543009882 per locus)
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCF_top25_PDSquantile_byG4overlap.pdf"),
plot = last_plot())
Saving 6 x 3 in image
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = geo_bigwigs[2],bg_bwfiles = geo_bigwigs[1], loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[2],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
Warning in plot_bw_profile(bwfiles = geo_bigwigs[2], bg_bwfiles = geo_bigwigs[1], :
Stderr estimate not available when normalizing by input
p2 <- plot_bw_profile(bwfiles = geo_bigwigs[2],bg_bwfiles = geo_bigwigs[1], loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[2],norm_mode = "log2fc",show_error = T) + coord_cartesian(ylim=c(-1,1))
Warning in .summarize_matrix(fg, label) :
Profile plot: 639 generated ( 0.0517576543009882 per locus)
Warning in .summarize_matrix(bg, "bg") :
Profile plot: 639 generated ( 0.0517576543009882 per locus)
Warning in plot_bw_profile(bwfiles = geo_bigwigs[2], bg_bwfiles = geo_bigwigs[1], :
Stderr estimate not available when normalizing by input
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_CTCFfoldchange_top25_PDSquantile_byG4.pdf"),
plot = last_plot())
Saving 6 x 3 in image
top25.pds.CTCF_G4 <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_and_G4"),1:3],na.rm=T)
top25.pds.CTCF_only <- makeGRangesFromDataFrame(results[(results$pds.quantile==4 & results$class == "CTCF_not_G4"),1:3],na.rm=T)
p1 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pds.CTCF_G4, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc") + coord_cartesian(ylim=c(0,10))
p2 <- plot_bw_profile(bwfiles = G4_bigwigs, loci = top25.pds.CTCF_only, mode = "center",verbose = F, colors = mypal[3],norm_mode = "log2fc") + coord_cartesian(ylim=c(0,10))
ggarrange(p1,p2, ncol=2)
ggsave(glue("{plot_folder}Profile_G4_top25_PDSquantile.pdf"),
plot = last_plot())
Saving 6 x 3 in image
ord.top25 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
Warning in .preprocess_heatmap_matrix(values[[1]], zmin, zmax, order_by, :
Large matrix: 12903. Downscaled to 50
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[2],loci = top25.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 50, order_by = ord.top25)
Warning in .preprocess_heatmap_matrix(values[[1]], zmin, zmax, order_by, :
Large matrix: 12903. Downscaled to 50
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_top25_PDSquantile.pdf"),
plot = last_plot())
Saving 3 x 3 in image
ord.bot75 <- order(rowMeans(as.data.frame(bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center"))))
p1 <- plot_bw_heatmap(bwfiles = geo_bigwigs[1],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
Warning in .preprocess_heatmap_matrix(values[[1]], zmin, zmax, order_by, :
Large matrix: 38710. Downscaled to 200
p2 <- plot_bw_heatmap(bwfiles = geo_bigwigs[2],loci = bot75.pds, mode = "center",verbose = F, zmax = 10, max_rows_allowed = 200, order_by = ord.bot75)
Warning in .preprocess_heatmap_matrix(values[[1]], zmin, zmax, order_by, :
Large matrix: 38710. Downscaled to 200
ggarrange(p1,p2, ncol=2, common.legend = T)
ggsave(glue("{plot_folder}Heatmap_CTCF_bot75_PDSquantile.pdf"),
plot = last_plot())
Saving 3 x 4 in image
G4_CnR <- bw_loci(c("../mendeley/FigureS2/GSM6634325_E14_Mock_G4.bw","../mendeley/FigureS2/GSM6634326_E14_PDS_G4.bw"),loci = makeGRangesFromDataFrame(results,na.rm=T),labels = c("Mock","PDS"))
results$G4_CnR_delta <- G4_CnR$PDS-G4_CnR$Mock
results$G4_CnR_lfc <- log2(G4_CnR$PDS/G4_CnR$Mock)
results$G4_CnR_mock <- G4_CnR$Mock
ggviolin(
results,
x = "pds.quantile",
y = "G4_CnR_delta",
fill = mypal[2],
add = "median_iqr"
) + coord_cartesian(ylim = c(-10, 15)) + geom_hline(yintercept = 0, linetype =
"dotted")
ggsave(glue("{plot_folder}Violin_G4_CnR_delta_by_PDSquantile.pdf"),
plot = last_plot())
Saving 3 x 2 in image
compare_means(G4_CnR_delta ~ pds.quantile, results)
results$pds.quartile.top25 <- results$pds.quantile == 4
mean(na.omit(results$G4_CnR_delta))
[1] 2.193939
mean(results$G4_CnR_delta[results$pds.quartile.top25])
[1] 2.32057
mean(results$G4_CnR_delta[!results$pds.quartile.top25])
[1] 2.151729
compare_means(G4_CnR_delta ~ pds.quartile.top25, results)
export.bed(top25.pds,"../peaks/CTCF_peaks_top25_pds_up.bed")
export.bed(bot75.pds,"../peaks/CTCF_peaks_bot75_pds_up.bed")
ggviolin(
results,
x = "G4.quantile",
y = "deseq.lfc.pds",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 2)) + geom_hline(yintercept = 0, linetype =
"dotted")
ggsave(glue("{plot_folder}Violin_CTCF_lfc_by_G4quantile.pdf"),
plot = last_plot())
ggviolin(
results,
x = "G4.quantile",
y = "log2.G4",
fill = mypal2[5],
add = "median_iqr"
) + geom_hline(yintercept = 0, linetype = "dotted")
ggsave(glue("{plot_folder}Violin_G4_by_G4quantile.pdf"), plot = last_plot())
peak_cats <- bedscout::import_named_bed_into_list(peaks_bed)
plot_bw_profile(
G4_bigwigs,
peak_cats,
labels = c(
peak_cats[[1]][1, ]$name,
peak_cats[[2]][1, ]$name,
peak_cats[[3]][1, ]$name,
peak_cats[[4]][1, ]$name
),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal
)
plot_bw_profile(
combined_bigwigs[1],
peak_cats,
labels = c(
peak_cats[[1]][1, ]$name,
peak_cats[[2]][1, ]$name,
peak_cats[[3]][1, ]$name,
peak_cats[[4]][1, ]$name
),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal
)
p1 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[1]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 5, 6)],
upstream = 1500,
downstream = 1500
)
p2 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[2]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 5, 6)],
upstream = 1500,
downstream = 1500
)
p3 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[3]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 5, 6)],
upstream = 1500,
downstream = 1500
)
p4 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[4]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 5, 6)],
upstream = 1500,
downstream = 1500
)
ggarrange(p1, p2, p3, p4, ncol = 4, nrow = 1)
p1 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[1]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 3, 4)],
upstream = 1500,
downstream = 1500
)
p2 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[2]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 3, 4)],
upstream = 1500,
downstream = 1500
)
p3 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[3]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 3, 4)],
upstream = 1500,
downstream = 1500
)
p4 <- plot_bw_profile(
c(mocks_bigwigs, pds_bigwigs),
peak_cats[[4]],
labels = c("mock1", "mock2", "trt1", "rtr2"),
mode = "center",
show_error = T,
verbose = F,
remove_top = 0.001,
colors = mypal2[c(9, 10, 3, 4)],
upstream = 1500,
downstream = 1500
)
ggarrange(p1, p2, p3, p4, ncol = 4, nrow = 1)
sigup.pds <- results[results$deseq.sigup.pds, ]
sigup.pdc <- results[results$deseq.sigup.pdc, ]
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
#export.bed(sigup.pds,"peaks_sigup_pds.bed")
gghistogram(results, "width", add = "median", fill = "class")
G4_bed <- import('../data/G4_CnT/G4_WT_peaks.narrowPeak')
ATAC_bed <- import('../data/ATAC-seq/ATAC_seq_mESC_Martire_peaks.narrowPeak')
pro_bed <- import('../data/regions/promoters_geneSymbol.mm10.bed')
G4_bed <- bedscout::annotate_overlapping_features(G4_bed, pro_bed, name_field = "name")
G4_bed$name <- "G4"
G4_bed$name[!is.na(G4_bed$nearby_features)] <- "G4pro"
G4_bed$name[grepl("pro", G4_bed$name)] <- "G4pro"
nearest_G4_1kb <- bedscout::annotate_nearby_features(
ctcf,
G4_bed,
distance_cutoff = 1000,
ignore.strand = T,
name_field = "name"
)
nearest_G4_2.5kb <- bedscout::annotate_nearby_features(
ctcf,
G4_bed,
distance_cutoff = 2500,
ignore.strand = T,
name_field = "name"
)
nearest_G4_5kb <- bedscout::annotate_nearby_features(
ctcf,
G4_bed,
distance_cutoff = 5000,
ignore.strand = T,
name_field = "name"
)
nearest_G4_10kb <- bedscout::annotate_nearby_features(
ctcf,
G4_bed,
distance_cutoff = 10000,
ignore.strand = T,
name_field = "name"
)
nearest_G4_50kb <- bedscout::annotate_nearby_features(
ctcf,
G4_bed,
distance_cutoff = 50000,
ignore.strand = T,
name_field = "name"
)
ctcf$nearest_G4 <- factor(">50kb",
levels = c("<1kb", "<2.5kb", "<5kb", "<10kb", "<50kb", ">50kb"))
ctcf$nearest_G4[!is.na(nearest_G4_50kb$nearby_features)] <- "<50kb"
ctcf$nearest_G4[!is.na(nearest_G4_10kb$nearby_features)] <- "<10kb"
ctcf$nearest_G4[!is.na(nearest_G4_5kb$nearby_features)] <- "<5kb"
ctcf$nearest_G4[!is.na(nearest_G4_2.5kb$nearby_features)] <- "<2.5kb"
ctcf$nearest_G4[!is.na(nearest_G4_1kb$nearby_features)] <- "<1kb"
ctcf$nearest_G4_type <- "none"
ctcf$nearest_G4_type[!is.na(nearest_G4_50kb$nearby_features)] <- nearest_G4_50kb$nearby_features[!is.na(nearest_G4_50kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_10kb$nearby_features)] <- nearest_G4_10kb$nearby_features[!is.na(nearest_G4_10kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_5kb$nearby_features)] <- nearest_G4_5kb$nearby_features[!is.na(nearest_G4_5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_2.5kb$nearby_features)] <- nearest_G4_2.5kb$nearby_features[!is.na(nearest_G4_2.5kb$nearby_features)]
ctcf$nearest_G4_type[!is.na(nearest_G4_1kb$nearby_features)] <- nearest_G4_1kb$nearby_features[!is.na(nearest_G4_1kb$nearby_features)]
ctcf$nearest_G4_type[grep("pro", ctcf$nearest_G4_type)] <- "G4pro"
results$nearest_G4 <- ctcf$nearest_G4
results$nearest_G4_type <- ctcf$nearest_G4_type
table(results$nearest_G4)
table(results$nearest_G4_type)
table(results$nearest_G4, results$nearest_G4_type)
ggviolin(
results,
x = "nearest_G4",
y = "mean.mock",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10)) +
stat_compare_means(label.y = 8,
label.x = 3,
size = 3) +
geom_hline(yintercept = 0, linetype = "dotted")
ggsave(glue("{plot_folder}Violin_CTCF_signal_by_G4distance.pdf"),
plot = last_plot())
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pds",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(-4, 4)) + geom_hline(yintercept = 0, linewidth = 0.2) +
geom_hline(
yintercept = median(results$deseq.lfc.pds[results$nearest_G4 == "<1kb"]),
linetype = "dotted",
linewidth = 0.2
) +
stat_compare_means(label.y = 3,
label.x = 2,
size = 3)
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_G4distance.pdf"),
plot = last_plot())
nearest_G4_stats = compare_means(deseq.lfc.pds ~ nearest_G4, results)
nearest_G4_stats
write_tsv(nearest_G4_stats,
glue("{stat_output}pds_G4distance_plots-statistics.tsv"))
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pdc",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 2)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "<1kb"]),
linetype = "dotted",
linewidth = 0.2
)
ggsave(glue("{plot_folder}Violin_CTCF_PhenDC3_lfc_by_G4distance.pdf"), plot = last_plot())
nearest_G4_stats = compare_means(deseq.lfc.pdc ~ nearest_G4, results)
nearest_G4_stats
write_tsv(nearest_G4_stats, glue("{stat_output}phendc_G4distance_plots-statistics.tsv"))
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pds",
fill = "nearest_G4_type",
palette = mypal[c(1, 3, 5)],
add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 4)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = mean(results$deseq.lfc.pds[results$nearest_G4 == "<1kb"]),
linetype = "dotted",
linewidth = 0.2
) +
stat_compare_means(aes(group = nearest_G4_type))
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_G4distance_pro.pdf"),
plot = last_plot())
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
"G4", ])
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
"G4", ]) %>%
write_tsv(.,
glue(
"{stat_output}pds_G4distance_plots-nearest_G4-statistics.tsv"
))
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
"G4pro", ])
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$nearest_G4_type ==
"G4pro", ]) %>%
write_tsv(.,
glue(
"{stat_output}pds_G4distance_plots-nearest_G4pro-statistics.tsv"
))
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pdc",
fill = "nearest_G4_type",
palette = mypal[c(1, 3, 5)],
add = "median_iqr"
) + coord_cartesian(ylim = c(-2, 2)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "<1kb"]),
linetype = "dotted",
linewidth = 0.2
)
ggsave(glue(
"{plot_folder}Violin_CTCF_PhenDC3_lfc_by_G4distance_pro.pdf"
),
plot = last_plot())
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
"G4", ])
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
"G4", ]) %>%
write_tsv(.,
glue(
"{stat_output}phendc_G4distance_plots-nearest_G4-statistics.tsv"
))
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
"G4pro", ])
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$nearest_G4_type ==
"G4pro", ]) %>%
write_tsv(.,
glue(
"{stat_output}phendc_G4distance_plots-nearest_G4pro-statistics.tsv"
))
#results <- read.table(glue("{result_folder}foldchange_results.txt"))
results$class <- factor(results$class, levels = c("CTCF_and_G4", "CTCF_not_G4"))
G4_bed <- import('../data/G4_CnT/G4_WT_peaks.narrowPeak')
G4_bed$name <- "peak"
G4pred_bed <- import('../data/predG4/mm10_canonical_G4_PQS-regex.bed')
ATAC_bed <- import('../data/ATAC-seq/ATAC_seq_mESC_Martire_peaks.narrowPeak')
pro_bed <- import('../data/regions/promoters_geneSymbol.mm10.bed')
G4pred_bed <- bedscout::annotate_overlapping_features(G4pred_bed, G4_bed, name_field = "name")
G4pred_bed$name <- "G4pred"
G4pred_bed$name[grepl("peak", G4pred_bed$nearby_features)] <- "G4exp"
table(G4pred_bed$name)
nearest_G4_0kb <- bedscout::annotate_nearby_features(
ctcf,
G4pred_bed,
distance_cutoff = 0,
ignore.strand = T,
name_field = "name"
)
nearest_G4_1kb <- bedscout::annotate_nearby_features(
ctcf,
G4pred_bed,
distance_cutoff = 1000,
ignore.strand = T,
name_field = "name"
)
nearest_G4_2.5kb <- bedscout::annotate_nearby_features(
ctcf,
G4pred_bed,
distance_cutoff = 2500,
ignore.strand = T,
name_field = "name"
)
nearest_G4_5kb <- bedscout::annotate_nearby_features(
ctcf,
G4pred_bed,
distance_cutoff = 5000,
ignore.strand = T,
name_field = "name"
)
nearest_G4_10kb <- bedscout::annotate_nearby_features(
ctcf,
G4pred_bed,
distance_cutoff = 10000,
ignore.strand = T,
name_field = "name"
)
ctcf$nearest_G4 <- factor(">10kb",
levels = c("0kb", "<1kb", "<2.5kb", "<5kb", "<10kb", ">10kb"))
ctcf$nearest_G4[!is.na(nearest_G4_10kb$nearby_features)] <- "<10kb"
ctcf$nearest_G4[!is.na(nearest_G4_5kb$nearby_features)] <- "<5kb"
ctcf$nearest_G4[!is.na(nearest_G4_2.5kb$nearby_features)] <- "<2.5kb"
ctcf$nearest_G4[!is.na(nearest_G4_1kb$nearby_features)] <- "<1kb"
ctcf$nearest_G4[!is.na(nearest_G4_0kb$nearby_features)] <- "0kb"
results$nearest_G4 <- ctcf$nearest_G4
table(results$nearest_G4)
table(results$nearest_G4, results$pro)
mdf <- reshape2::melt(table(results$nearest_G4, results$pro))
ggplot(mdf, aes(Var1, Var2, fill = value)) +
geom_tile(show.legend = F) + geom_text(aes(label = value)) +
scale_fill_gradient(low = "white", high = "orange") + theme_minimal()
ggsave(glue("{plot_folder}Heatmap_CTCF_sites_by_predG4distance.pdf"),
plot = last_plot())
ggviolin(
results,
x = "nearest_G4",
y = "mean.mock",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(0, 10)) + geom_hline(yintercept = 0, linetype =
"dotted") +
stat_compare_means(
aes(group = nearest_G4),
label.y = 8,
label.x = 2,
size = 3
)
ggsave(glue(
"{plot_folder}Violin_CTCF_signal_by_predG4distance.pdf"),
plot = last_plot())
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pds",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = median(results$deseq.lfc.pds[results$nearest_G4 == "0kb"]),
linetype = "dotted",
linewidth = 0.2
) +
stat_compare_means(label.y = 1.3,
label.x = 3,
size = 2)
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_predG4distance.pdf"),
plot = last_plot())
compare_means(deseq.lfc.pds ~ nearest_G4, results)
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pdc",
fill = mypal[1],
add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "0kb"]),
linetype = "dotted",
linewidth = 0.2
) +
stat_compare_means(label.y = 1.3,
label.x = 3,
size = 2)
ggsave(glue(
"{plot_folder}Violin_CTCF_PhenDC3_lfc_by_predG4distance.pdf"
),
plot = last_plot())
compare_means(deseq.lfc.pdc ~ nearest_G4, results)
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pds",
fill = "pro",
palette = mypal[c(1, 3, 5)],
add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = mean(results$deseq.lfc.pds[results$nearest_G4 == "0kb"]),
linetype = "dotted",
linewidth = 0.2
) +
stat_compare_means(aes(group = pro),
label.y = 1.3,
label.x = 3,
size = 1.5)
ggsave(glue("{plot_folder}Violin_CTCF_PDS_lfc_by_predG4distance_pro.pdf"), plot = last_plot())
```{ r } compare_means(deseq.lfc.pds ~ nearest_G4, results[results\(pro == "noPro", ]) compare_means(deseq.lfc.pds ~ nearest_G4, results[results\)pro == “noPro”, ]) %>% write_tsv(., “pds_G4distance_plots-noPro-statistics.tsv”)
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuY29tcGFyZV9tZWFucyhkZXNlcS5sZmMucGRzIH4gbmVhcmVzdF9HNCwgcmVzdWx0c1tyZXN1bHRzJHBybyA9PSBcIlByb1wiLCBdKSAlPiVcbiAgd3JpdGVfdHN2KC4sXG4gICAgICAgICAgICBnbHVlKFwie3N0YXRfb3V0cHV0fXBkc19HNGRpc3RhbmNlX3Bsb3RzLVByby1zdGF0aXN0aWNzLnRzdlwiKSlcblxuY29tcGFyZV9tZWFucyhkZXNlcS5sZmMucGRzIH4gcHJvLCBncm91cC5ieSA9IFwibmVhcmVzdF9HNFwiLCByZXN1bHRzKSAlPiVcbiAgd3JpdGVfdHN2KC4sXG4gICAgICAgICAgICBnbHVlKFxuICAgICAgICAgICAgICBcIntzdGF0X291dHB1dH1wZHNfRzRkaXN0YW5jZV9wbG90cy1iZXR3ZWVuX3Byb21fdHlwZXNfc3RhdHMudHN2XCJcbiAgICAgICAgICAgICkpXG5cbmNvbXBhcmVfbWVhbnMoZGVzZXEubGZjLnBkcyB+IG5lYXJlc3RfRzQsIGdyb3VwLmJ5ID0gXCJwcm9cIiwgcmVzdWx0cykgJT4lXG4gIHdyaXRlX3RzdiguLFxuICAgICAgICAgICAgZ2x1ZShcbiAgICAgICAgICAgICAgXCJ7c3RhdF9vdXRwdXR9cGRzX0c0ZGlzdGFuY2VfcGxvdHMtd2l0aGluX3Byb21fdHlwZXNfc3RhdHMudHN2XCJcbiAgICAgICAgICAgICkpXG5cbmBgYCJ9 -->
```r
compare_means(deseq.lfc.pds ~ nearest_G4, results[results$pro == "Pro", ]) %>%
write_tsv(.,
glue("{stat_output}pds_G4distance_plots-Pro-statistics.tsv"))
compare_means(deseq.lfc.pds ~ pro, group.by = "nearest_G4", results) %>%
write_tsv(.,
glue(
"{stat_output}pds_G4distance_plots-between_prom_types_stats.tsv"
))
compare_means(deseq.lfc.pds ~ nearest_G4, group.by = "pro", results) %>%
write_tsv(.,
glue(
"{stat_output}pds_G4distance_plots-within_prom_types_stats.tsv"
))
ggviolin(
results,
x = "nearest_G4",
y = "deseq.lfc.pdc",
fill = "pro",
palette = mypal[c(1, 3, 5)],
add = "median_iqr"
) + coord_cartesian(ylim = c(-0.5, 1.5)) + geom_hline(yintercept = 0, linewidth = 0.2) + geom_hline(
yintercept = mean(results$deseq.lfc.pdc[results$nearest_G4 == "0kb"]),
linetype = "dotted",
linewidth = 0.2
) +
stat_compare_means(aes(group = pro),
label.y = 1.3,
label.x = 3,
size = 1.5)
ggsave(glue(
"{plot_folder}Violin_CTCF_PhenDC3_lfc_by_predG4distance_pro.pdf"
),
plot = last_plot())
compare_means(deseq.lfc.pdc ~ nearest_G4, results[results$pro == "noPro", ]) %>%
write_tsv(.,
glue(
"{stat_output}phendc_G4distance_plots-noPro-statistics.tsv"
))
compare_means(deseq.lfc.pdc ~ pro, group.by = "nearest_G4", results) %>%
write_tsv(.,
glue(
"{stat_output}phendc_G4distance_plots-between_prom_types_stats.tsv"
))
compare_means(deseq.lfc.pdc ~ nearest_G4, group.by = "pro", results) %>%
write_tsv(.,
glue(
"{stat_output}phendc_G4distance_plots-within_prom_types_stats.tsv"
))